import requests
from lxml import etree
import time



def save_content(title='章节', content=''):
    try:
        with open('灵境行者.txt', 'a+', encoding='utf-8') as fw:
            fw.write(title + '\n\n' + content)

    except Exception as err:
        print(f'save err ==> {err}')
        pass
    print('---------------------------- save done.')
    return



def request_url(url):
    next_url = ''
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
    }

    try:
        resp = requests.get(url=url, headers=headers)
        # 根据网页内容转换字符集 utf-8/gbk
        resp.encoding = 'gbk'

        # 内容处理
        e = etree.HTML(resp.text)
        title = e.xpath('//div[@class="bookname"]/h1/text()')[0]
        next_url = e.xpath('//div[@class="bottem1"]/a[4]/@href')[0]

        content = e.xpath('//div[@id="content"]/text()')
        content = ''.join([x for x in content if x != '\r\n'])

        # 替换掉$nbsp生成的空格符
        content = content.replace(' ', '')
        save_content(title, content)

        print('title ==> {}'.format(title))
        print(f'body ==> {content}')
        print(f'next ==> {next_url}')
        print('-------------------- request done.')

        pass
    except Exception as err:

        pass

    return next_url


def run():
    base_url = 'https://www.82zg.com'
    page_url = '/book/72250/29220516.html'

    while True:
        url = base_url + page_url
        resp = request_url(url)
        print(resp)
        if resp == '' or resp.find('.html') == -1:
            break
        page_url = resp
        time.sleep(0.01)

    pass


if __name__ == '__main__':
    run()